Code
/* set DT table fontsizes */
th { font-size: 11px; } /* header font */
td { font-size: 11px; } /* cell font *//* set DT table fontsizes */
th { font-size: 11px; } /* header font */
td { font-size: 11px; } /* cell font */I used the rotterdam dataset from the survival package, which includes 2982 primary breast cancers patients whose records were included in the Rotterdam tumor bank.
This dataset records two events: disease relapse and death. Follow-up time is provided for each, with the origin being the time of the primary surgery.
In the article from which this data stems, the authors restricted their dataset to the 1546 patients who had node-positive disease (“Since the validation dataset comprised only node-positive patients and nodal status is an important prognostic factor, we omitted the node-negative patients to create the derivation dataset”). I decided to also restrict the dataset to only node-positive patients.
See:
Royston, P., Altman, D.G. External validation of a Cox prognostic model: principles and methods. BMC Med Res Methodol 13, 33 (2013). https://doi.org/10.1186/1471-2288-13-33
Quoted paragraph from the article:
“Candidate prognostic variables in the breast cancer datasets were age at primary surgery (age, years), menopausal status (meno, 0 = premenopausal, 1 = postmenopausal), tumour size (size), tumour grade (grade), number of positive lymph nodes (nodes), progesterone receptors (pgr, fmol/l), oestrogen receptors (er, fmol/l), hormonal treatment (hormon, 0 = no, 1 = yes), and chemotherapy (chemo). Tumour size (mm) was not available as a continuous variable in the Rotterdam dataset, therefore a standard coding was used; the base category was ≤ 20 mm and two dummy variables were used, namely 20 to 50 mm (sized1) and > 50 mm (sized2). We excluded grade, since it was measured according to a different protocol in the two datasets, and chemo, since all patients in the validation dataset received chemotherapy.”
# install.packages("pacman")
pacman::p_load(
here,
tidyverse,
survival,
survminer,
ggsurvfit,
janitor,
DT,
flextable,
patchwork # easily combine plots
)
# Custom package with useful function for summarising dataframe
# pak::pak("UEP-HUG/UEPtools")
# Load in the rotterdam dataset from the survival package
rotterdam <- survival::rotterdam |>
# filter out node-negative patients
filter(nodes > 0) # as "nodal status is an important prognostic factor, we omitted the node-negative patients to create the derivation dataset"
# Add in the variable description
rotterdam_variables <- tribble(
~name, ~value,
"pid", "patient identifier",
"year", "year of surgery",
"age", "age at surgery",
"meno", "menopausal status (0= premenopausal, 1= postmenopausal)",
"size", "tumor size, a factor with levels <=20, 20-50, >50",
"grade", "differentiation grade",
"nodes", "number of positive lymph nodes",
"pgr", "progesterone receptors (fmol/l)",
"er", "estrogen receptors (fmol/l)",
"hormon", "hormonal treatment (0=no, 1=yes)",
"chemo", "chemotherapy",
"rtime", "days to relapse or last follow-up",
"recur", "0= no relapse, 1= relapse",
"dtime", "days to death or last follow-up",
"death", "0= alive, 1= dead"
)
# Define a function for generating a clean table from the KM estimates
KM_table <- function(x){
x |> # the survfit2() object
ggsurvfit::tidy_survfit(times = seq(1000,7000,1000)) |>
dplyr::relocate(std.error, .after = conf.low) |>
dplyr::mutate(across(c(estimate:conf.low), ~round(.x,2))) |>
dplyr::mutate(std.error = round(std.error, 4)) |>
dplyr::select(-c(estimate_type:conf.level)) |>
flextable::flextable() |>
flextable::fontsize(size = 9, part = "all") |>
autofit() |>
flextable::theme_zebra()
}
# Define a function for generating the KM curves
KM_plot <- function(x, strataname = NULL){
x |>
ggsurvfit()+
add_censor_mark(size = 1.5, alpha = 0.8) +
add_confidence_interval() +
add_risktable(risktable_stats = c("n.risk", "cum.censor", "cum.event")) +
add_quantile(y_value = 0.5, color = "gray50", linewidth = 0.5) +
scale_ggsurvfit()+
add_pvalue()+
labs(title = "Relapse-free survival after surgery",
color = strataname,
fill = strataname
)
}I went through the package’s documentation for the dataset to also include the descriptions of the variables, and used a custom function to summarise the dataset:
print(paste0("Number of patients analyzed: ", nrow(rotterdam)))[1] "Number of patients analyzed: 1546"
rotterdam |>
UEPtools::metadata_generator_any(variable_names = rotterdam_variables) |>
select(-Num_Values) |>
DT::datatable(
filter = "top",
options = list(
pageLength = 26
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)Create new outcome variable rd, where 0= alive without relapse, 1= relapse or death.
Also create rd_time variable where I calculate days to first of relapse, death or last follow-up
rotterdam_cleaned <- rotterdam |>
mutate(
# Convert several variables to factor
meno = fct_recode(factor(meno), premenopausal = "0", postmenopausal = "1"),
grade = factor(grade),
hormon = fct_recode(factor(hormon), No = "0", Yes = "1"),
chemo = fct_recode(factor(chemo), No = "0", Yes = "1"),
# Add categorical variable for number of nodes
nodes_cat = factor(
case_when(
nodes >0 & nodes <4 ~ "1-3",
nodes >= 4 & nodes < 9 ~ "4-9",
nodes >=9 ~ "9+"
)),
# Add categorical variable for number of estrogen receptors
er_cat = factor(
case_when(
er <41 ~ "0-40",
er >= 41 & er <301 ~ "41-300",
er >=301 ~ "301+"
),levels = c("0-40", "41-300", "301+")),
# Add categorical variable for number of progesterone receptors
pgr_cat = factor(
case_when(
pgr <21 ~ "0-20",
pgr >= 21 & pgr <201 ~ "21-200",
pgr >=201 ~ "201+"
),levels = c("0-20", "21-200", "201+")
),
# `rd` where 0= alive without relapse, 1= relapse or death
rd = case_when(recur==1|death==1 ~ 1, .default = 0),
# Calculate days to first of relapse, death or last follow-up
rd_time = case_when(recur==1 ~ rtime, .default = dtime),
# Add categorical age variable
age_cat = factor(case_when(
age < 65 ~ "24-64",
age >= 65 ~ "65+"))
)
# Update the rotterdam_variables object
rotterdam_variables <- bind_rows(
rotterdam_variables,
tribble(
~name, ~value,
"rd", "0= alive without relapse, 1= relapse or death",
"rd_time", "days to relapse/death or last follow-up",
"age_cat", "age category at surgery")
)p1 <- rotterdam_cleaned |> ggplot()+geom_histogram(aes(x=age))+labs(x="age at surgery")
p2 <- rotterdam_cleaned |> ggplot()+geom_bar(aes(x=size))+labs(x="tumor size")
p3 <- rotterdam_cleaned |> ggplot()+geom_histogram(aes(x=nodes))+labs(x="number of positive lymph nodes")
p4 <- rotterdam_cleaned |> ggplot()+geom_histogram(aes(x=pgr))+labs(x="progesterone receptors (fmol/l)")
p5 <- rotterdam_cleaned |> ggplot()+geom_histogram(aes(x=er))+labs(x="estrogen receptors (fmol/l)")
p6 <- rotterdam_cleaned |> ggplot()+geom_bar(aes(x=hormon))+labs(x="hormonal treatment")
p7 <- rotterdam_cleaned |> ggplot()+geom_bar(aes(x=meno))+labs(x="menopausal status")
p8 <- rotterdam_cleaned |> ggplot()+geom_histogram(aes(x=rd_time))+labs(x="days to relapse/death or last follow-up")
p1+p2+p3+p4+p5+p6+p7+p8+ plot_layout(ncol = 2)Relapse-free survival probability at different timepoints:
# Fit a Surv-type object for right-censored data
rotterdam_surv_fit_rd <-
# survival::survfit(Surv(rd_time, rd) ~ 1, data = rotterdam_cleaned)
# Using survfit2 from the ggsurvfit package allows easier control for
# downstream analysis using the ggsurvfit() function:
ggsurvfit::survfit2(Surv(rd_time, rd) ~ 1, data = rotterdam_cleaned)
# Print its summary at specific times
rotterdam_surv_fit_rd |> KM_table()time | n.risk | n.event | n.censor | cum.event | cum.censor | estimate | conf.high | conf.low | std.error |
|---|---|---|---|---|---|---|---|---|---|
1,000 | 928 | 608 | 10 | 608 | 10 | 0.61 | 0.63 | 0.58 | 0.0205 |
2,000 | 573 | 289 | 66 | 897 | 76 | 0.41 | 0.44 | 0.39 | 0.0307 |
3,000 | 321 | 105 | 147 | 1,002 | 223 | 0.33 | 0.35 | 0.30 | 0.0384 |
4,000 | 112 | 62 | 147 | 1,064 | 370 | 0.24 | 0.27 | 0.22 | 0.0547 |
5,000 | 24 | 14 | 74 | 1,078 | 444 | 0.19 | 0.22 | 0.15 | 0.0970 |
6,000 | 3 | 2 | 19 | 1,080 | 463 | 0.14 | 0.22 | 0.09 | 0.2196 |
7,000 | 1 | 0 | 2 | 1,080 | 465 | 0.14 | 0.22 | 0.09 | 0.2196 |
Restricted Mean Survival Time (RMST): Restricted Mean Survival Time (RMST) is a statistical measure used in survival analysis that offers a clinically meaningful way to compare survival outcomes between groups. It represents the average event-free survival time up to a pre-specified time point, and is defined as the area under the survival curve from time zero up to a specific, pre-defined time point. (using the survival package, I can only print the RMST for single groups or strata, but for more detailed comparisons between groups (e.g. getting the difference in RMST with confidence intervals) I could try more specialised packages, e.g. survRM2.
“The RMST may provide valuable information for comparing two survival curves when the proportional hazards assumption is not met, such as in cases of crossing or delayed separation of survival curves.”
- Han K, Jung I. Restricted Mean Survival Time for Survival Analysis: A Quick Guide for Clinical Researchers. Korean J Radiol. 2022 May;23(5):495-499. doi: 10.3348/kjr.2022.0061
For this analysis, I’ve set the restriction time at 5 years (1,825 days):
print(rotterdam_surv_fit_rd, rmean = 365*5, print.rmean = TRUE)Call: survfit(formula = Surv(rd_time, rd) ~ 1, data = rotterdam_cleaned)
n events rmean* se(rmean) median 0.95LCL 0.95UCL
[1,] 1546 1080 1225 16.1 1441 1301 1583
* restricted mean with upper limit = 1825
rotterdam_surv_fit_rd |>
ggsurvfit(color = "tomato")+
add_censor_mark(color = "tomato", size = 1.5, alpha = 0.8) +
add_confidence_interval(fill = "tomato") +
add_risktable(risktable_stats = c("n.risk", "cum.censor", "cum.event")) +
add_quantile(y_value = 0.5, color = "gray60", linewidth = 0.5) +
scale_ggsurvfit()+
labs(title = "Relapse-free survival after surgery")Survival probabilities:
rotterdam_surv_fit_meno <- ggsurvfit::survfit2(Surv(rd_time, rd) ~ meno, data = rotterdam_cleaned)
rotterdam_surv_fit_meno |> KM_table() |> hline(7)time | n.risk | n.event | n.censor | cum.event | cum.censor | estimate | conf.high | conf.low | std.error | strata |
|---|---|---|---|---|---|---|---|---|---|---|
1,000 | 524 | 386 | 8 | 386 | 8 | 0.58 | 0.61 | 0.55 | 0.0282 | postmenopausal |
2,000 | 305 | 181 | 38 | 567 | 46 | 0.37 | 0.41 | 0.34 | 0.0433 | postmenopausal |
3,000 | 162 | 68 | 75 | 635 | 121 | 0.28 | 0.31 | 0.25 | 0.0561 | postmenopausal |
4,000 | 51 | 37 | 74 | 672 | 195 | 0.19 | 0.23 | 0.16 | 0.0848 | postmenopausal |
5,000 | 7 | 11 | 33 | 683 | 228 | 0.12 | 0.17 | 0.08 | 0.1896 | postmenopausal |
6,000 | 1 | 1 | 5 | 684 | 233 | 0.06 | 0.25 | 0.01 | 0.7321 | postmenopausal |
7,000 | 0 | 0 | 1 | 684 | 234 | postmenopausal | ||||
1,000 | 404 | 222 | 2 | 222 | 2 | 0.65 | 0.68 | 0.61 | 0.0296 | premenopausal |
2,000 | 268 | 108 | 28 | 330 | 30 | 0.47 | 0.51 | 0.43 | 0.0428 | premenopausal |
3,000 | 159 | 37 | 72 | 367 | 102 | 0.40 | 0.44 | 0.36 | 0.0515 | premenopausal |
4,000 | 61 | 25 | 73 | 392 | 175 | 0.32 | 0.36 | 0.28 | 0.0692 | premenopausal |
5,000 | 17 | 3 | 41 | 395 | 216 | 0.28 | 0.34 | 0.23 | 0.1002 | premenopausal |
6,000 | 2 | 1 | 14 | 396 | 230 | 0.26 | 0.33 | 0.20 | 0.1327 | premenopausal |
7,000 | 1 | 0 | 1 | 396 | 231 | 0.26 | 0.33 | 0.20 | 0.1327 | premenopausal |
Restricted Mean Survival Time: Over 5 years (1825 days)
# Print mean survival time
print(rotterdam_surv_fit_meno, rmean = 365*5, print.rmean = TRUE)Call: survfit(formula = Surv(rd_time, rd) ~ meno, data = rotterdam_cleaned)
n events rmean* se(rmean) median 0.95LCL 0.95UCL
meno=premenopausal 628 396 1298 24.1 1749 1510 2141
meno=postmenopausal 918 684 1174 21.4 1275 1172 1442
* restricted mean with upper limit = 1825
Over 10 years (3650 days)
# Print mean survival time
print(rotterdam_surv_fit_meno, rmean = 365*10, print.rmean = TRUE)Call: survfit(formula = Surv(rd_time, rd) ~ meno, data = rotterdam_cleaned)
n events rmean* se(rmean) median 0.95LCL 0.95UCL
meno=premenopausal 628 396 2046 55.2 1749 1510 2141
meno=postmenopausal 918 684 1733 44.4 1275 1172 1442
* restricted mean with upper limit = 3650
KM curve:
rotterdam_surv_fit_meno |> KM_plot()Use a log-rank test, which accounts for the whole follow-up period. By comparing the observed number of events in each group with the number that would be expected if event rates were the same, a chi-squared test is used to determine if any observed differences are statistically meaningful.
survdiff(Surv(rd_time, rd) ~ meno, rho = 0, data = rotterdam_cleaned)Call:
survdiff(formula = Surv(rd_time, rd) ~ meno, data = rotterdam_cleaned,
rho = 0)
N Observed Expected (O-E)^2/E (O-E)^2/V
meno=premenopausal 628 396 479 14.4 26
meno=postmenopausal 918 684 601 11.5 26
Chisq= 26 on 1 degrees of freedom, p= 3e-07
Numeric covariates are categorized here only to aid visualisation
rotterdam_surv_fit_age_cat <- survfit2(Surv(rd_time, rd) ~ age_cat, data = rotterdam_cleaned)
rotterdam_surv_fit_age_cat |> KM_plot(strataname = "Age category")rotterdam_surv_fit_size <- survfit2(Surv(rd_time, rd) ~ size, data = rotterdam_cleaned)
rotterdam_surv_fit_size |> KM_plot(strataname = "Tumor size")rotterdam_surv_fit_hormon <- survfit2(Surv(rd_time, rd) ~ hormon, data = rotterdam_cleaned)
rotterdam_surv_fit_hormon |> KM_plot(strataname = "Hormonal treatment")rotterdam_surv_fit_nodes_cat <- survfit2(Surv(rd_time, rd) ~ nodes_cat, data = rotterdam_cleaned)
rotterdam_surv_fit_nodes_cat |> KM_plot(strataname = "Number of nodes")rotterdam_surv_fit_pgr_cat <- survfit2(Surv(rd_time, rd) ~ pgr_cat, data = rotterdam_cleaned)
rotterdam_surv_fit_pgr_cat |> KM_plot(strataname = "progesterone receptors (fmol/l)")rotterdam_surv_fit_er_cat <- survfit2(Surv(rd_time, rd) ~ er_cat, data = rotterdam_cleaned)
rotterdam_surv_fit_er_cat |> KM_plot(strataname = "estrogen receptors (fmol/l)")This is the full model using the variables specified in the original article (see section “Prognostic factors” above)
rotterdam_cox <-
coxph(Surv(rd_time, rd) ~ age + meno + size + nodes + pgr + er + hormon,
data = rotterdam_cleaned
)
summary(rotterdam_cox)Call:
coxph(formula = Surv(rd_time, rd) ~ age + meno + size + nodes +
pgr + er + hormon, data = rotterdam_cleaned)
n= 1546, number of events= 1080
coef exp(coef) se(coef) z Pr(>|z|)
age 0.0045330 1.0045433 0.0039914 1.136 0.25608
menopostmenopausal 0.2418071 1.2735485 0.1065947 2.268 0.02330 *
size20-50 0.2855600 1.3305069 0.0738251 3.868 0.00011 ***
size>50 0.6124397 1.8449269 0.0941898 6.502 7.92e-11 ***
nodes 0.0558619 1.0574516 0.0052392 10.662 < 2e-16 ***
pgr -0.0002002 0.9997998 0.0001237 -1.618 0.10560
er -0.0002603 0.9997398 0.0001250 -2.082 0.03733 *
hormonYes -0.3228457 0.7240856 0.0809431 -3.989 6.65e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
age 1.0045 0.9955 0.9967 1.0124
menopostmenopausal 1.2735 0.7852 1.0334 1.5695
size20-50 1.3305 0.7516 1.1513 1.5376
size>50 1.8449 0.5420 1.5339 2.2190
nodes 1.0575 0.9457 1.0466 1.0684
pgr 0.9998 1.0002 0.9996 1.0000
er 0.9997 1.0003 0.9995 1.0000
hormonYes 0.7241 1.3811 0.6179 0.8486
Concordance= 0.66 (se = 0.008 )
Likelihood ratio test= 235.5 on 8 df, p=<2e-16
Wald test = 266.1 on 8 df, p=<2e-16
Score (logrank) test = 277.2 on 8 df, p=<2e-16
survminer::ggforest(rotterdam_cox, data = rotterdam_cleaned)rotterdam_test <- cox.zph(rotterdam_cox)
rotterdam_test chisq df p
age 4.05333 1 0.04408
meno 0.00248 1 0.96026
size 13.17300 2 0.00138
nodes 6.73579 1 0.00945
pgr 15.02219 1 0.00011
er 24.55857 1 7.2e-07
hormon 2.99857 1 0.08334
GLOBAL 61.45472 8 2.4e-10
The ‘GLOBAL’ test appears to show that the PH assumption is violated for the overall model, and also appears to be violated for the age, size, nodes, and er variables.
wrap_plots(survminer::ggcoxzph(rotterdam_test), ncol =2)While the results of the test of the proportional hazards assumption imply violation of the proportional hazards assumption, after visually inspecting the Schoenfeld residuals the validity of the assumption may still be argued to hold. Still, next steps may involve some combination of transformation of variables and/or consideration of including time-dependent interaction terms.
We could additionally further explore comparing estimates of Restricted Mean Survival Time (RMST), e.g. see:
Han K, Jung I. Restricted Mean Survival Time for Survival Analysis: A Quick Guide for Clinical Researchers. Korean J Radiol. 2022 May;23(5):495-499. doi: 10.3348/kjr.2022.0061
“We conclude that the hazard ratio cannot be recommended as a general measure of the treatment effect in a randomized controlled trial, nor is it always appropriate when designing a trial. Restricted mean survival time may provide a practical way forward and deserves greater attention.”
- Royston, P., Parmar, M.K. Restricted mean survival time: an alternative to the hazard ratio for the design and analysis of randomized trials with a time-to-event outcome. BMC Med Res Methodol 13, 152 (2013). https://doi.org/10.1186/1471-2288-13-152